package com.esd.util;

import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import javax.annotation.Resource;

import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;

import com.esd.common.CatDao;
import com.esd.common.MongoDBUtil;
import com.esd.config.PageConfig;
import com.esd.download.SimpleConectionListener;
import com.esd.verifyCode.demo1.AutoDiscern;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.Cache;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.FormEncodingType;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.ScriptResult;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.WebWindow;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlImage;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;

public class HtmlunitDownLoadHtml {
	private static Logger log = Logger.getLogger(HtmlunitDownLoadHtml.class);
	private static WebClient webClient = null;

	public static void main(String[] args) throws URISyntaxException {
		String url = "http://spjc.bjmtg.gov.cn/hrapp/ProcessType/ProcessSortServlet?NextToDo=tdSort&Parm=E67BB630-DC17-11DF-B630-B2678DA151BA";
		String url1 = "http://www.bjmtg.gov.cn/hdpt/sjxx/";		
		String url2 = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view1&defname=政府采购质疑投诉";
		String url3 = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view1&defname=%D5%FE%B8%AE%B2%C9%B9%BA%D6%CA%D2%C9%CD%B6%CB%DF";
		String url4 = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=1000000153&id=ACFA5EE0-0BF5-11E4-9EE0-C632F97077BE";
		download(url4);
		// Document document = Jsoup.parse("");
		// Elements elements = document.getElementsByTag("body");
		// String script =
		// "<script type=\"text/javascript\">window.location.href='" + url +
		// "'</script>";
		// elements.get(0).append(script);
		// System.out.println(document.html());

		// encodeChinese("=民办职业技能培训机构发布的招生广告和简章（初、中级）备案");
	}

	public static void close() {
		if (webClient != null) {
			webClient.close();
		}
	}

	public HtmlunitDownLoadHtml() {
		if (webClient == null) {
			webClient = new WebClient(BrowserVersion.CHROME);
			new SimpleConectionListener(webClient);
		}
	}

	public static Document download(String url) throws URISyntaxException {
		CatDao dao = new CatDao();
		String index = "http://www.bjmtg.gov.cn/,http://www.61696156.gov.cn/,http://spjc.bjmtg.gov.cn/";
		String[] domain = index.split(",");
		try {
			long start = System.currentTimeMillis();
			webClient = new WebClient(BrowserVersion.CHROME);
			webClient.getOptions().setJavaScriptEnabled(false); // 启用JS解释器，默认为true
			webClient.getOptions().setCssEnabled(false); // 禁用css支持

			webClient.getOptions().setThrowExceptionOnScriptError(false); // js运行错误时，是否抛出异常
			webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
			webClient.getOptions().setTimeout(10000); // 设置连接超时时间

			// webClient.addRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=GBK");

			// long l = System.currentTimeMillis();
			// HtmlPage htmlPage = webClient.getPage(pageConfig.getUrl());
			HtmlPage htmlPage = null;
			Document document = null;
			WebWindow webWindos = null;

			WebWindow w0 = webClient.getWebWindows().get(0);
			webWindos = webClient.openWindow(new URL(url), "page", w0);
			Page page = webWindos.getEnclosedPage();
			// Page page = webClient.getPage(url);
			// WebResponse response = page.getWebResponse();
			// System.out.println(response.getContentType());//
			// application/xhtml+xml
			// log.debug(url + "===[" + response.getContentType() + "]");
			// System.out.println(page.isHtmlPage());
			if (page.isHtmlPage()) {
				htmlPage = (HtmlPage) page;
				// Cache c = new Cache();
				// c.getCachedResponse(page.getWebResponse().getWebRequest());
				// webClient.setCache(c);
				// String hrefValue = "javascript:document.charset = 'GBk';";
				// ScriptResult s =
				// htmlPage.executeJavaScript(hrefValue);//执行js方法
				// htmlPage = (HtmlPage) s.getNewPage();//获得执行后的新page对象

				// htmlPage.getWebResponse().getWebRequest().setAdditionalHeader("Content-Type",
				// "application/x-www-form-urlencoded;charset=utf-8");
				// htmlPage.getWebResponse().getWebRequest().setCharset("utf-8");
				// System.out.println(htmlPage.getUrl().toString());
				// System.out.println(System.currentTimeMillis() - l);
				// if (pageConfig.getSleep() > 0) {
				// Thread.sleep(pageConfig.getSleep());
				// }
				// System.out.println(htmlPage.asXml());
				// HtmlImage htmlImage = (HtmlImage)
				// htmlPage.getElementById("VerifyCodeImage");
				// InputStream is;
				// is = htmlImage.getWebResponse(true).getContentAsStream();
				// AutoDiscern a = new AutoDiscern();
				// byte[] d =AutoDiscern.image2byte(is);
				// String code = a.discernPic(d);
				// System.out.println(code);
				document = Jsoup.parse(htmlPage.asXml(), url);
			} else {
				document = Jsoup.parse("");
				Elements elements = document.getElementsByTag("body");
				String script = "<script type=\"text/javascript\">window.location.href='" + url + "'</script>";
				elements.get(0).append(script);
			}
			// 页内处理js跳转cx-20171019------------------------------------------------------------//
			// System.out.println("document-------->:"+document.toString());
			Elements links = document.select("a[href],area[href],iframe[src]");
			List<String> list = new ArrayList<>();
			boolean flag;
			String jsurl = null;
			
			for (Element e : links) {
				 System.out.println("**e**:"+e.toString());
				String href = e.attr("abs:href").trim();

				if (href.equals("")) {
					href = e.attr("href").trim();
					if(href.equals("")){
						href = e.attr("abs:src").trim();
						if (href == null) {
							continue;
						}
					}	
				}
				// System.out.println("href:"+href);
				// 过滤
				String s = dao.filterSuffix(href, domain);
				flag = false;
				if (list.size() > 0) {
					for (Iterator<String> iterator = list.iterator(); iterator.hasNext();) {
						String string = (String) iterator.next();

						if (string.equals(s)) {

							flag = true;
							break;
						}
					}
				}
				if (flag) {

					flag = false;
					continue;
				}
				list.add(s);
				// System.out.println("**s**:" + s);

				if (s != null) {
					// System.out.println("is not null:"+s);

					if (s.startsWith("javascript:")) {
						// System.out.println("s:"+s);
						if (s.startsWith("javascript:showDetail333")) {
							jsurl = "http://spjc.bjmtg.gov.cn/hrapp/webanser/savebody.jsp";
							document.getElementsByAttributeValue("href", s).attr("href", jsurl);
						}
						if (s.startsWith("javascript:ckxq()")) {
							String defname = encodeChinese1(document.getElementsByAttributeValue("name", "defname").get(0).val());					
							jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?defname="+ defname +"&operation="+document.getElementById("selForm").getElementsByAttributeValue("name", "operation").get(0).val();
							System.out.println("*********jsurl**********" + jsurl);
							document.getElementsByAttributeValue("href", s).attr("href", jsurl);
						}
						if (s.startsWith("javascript:showDetail666")) {
							if(s.split("\'").length > 1){
								jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view1&defname=" + encodeChinese(s.split("\'")[1]);
								document.getElementsByAttributeValue("href", s).attr("href", jsurl);
							}
							if(s.split("\"").length > 1){
								jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view1&defname=" + encodeChinese(s.split("\"")[1]);
								document.getElementsByAttributeValue("href", s).attr("href", jsurl);
							}
							
						}
						if (s.startsWith("javascript:showDetail(")) {
							if(s.split("\'").length > 1){
								if(s.split("\'").length == 3){
									jsurl = "http://spjc.bjmtg.gov.cn/hrapp/article/articleDetail.jsp?id=" + s.split("\'")[1];
									document.getElementsByAttributeValue("href", s).attr("href", jsurl);
								}else if(s.split("\'").length == 7){
									if (s.split("\'")[5].equals("1")) {
										jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=" + s.split("\'")[3] + "&id=" + s.split("\'")[1];
										document.getElementsByAttributeValue("href", s).attr("href", jsurl);
									} else if (s.split("\'")[5].equals("2")) {
										jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=" + s.split("\'")[3] + "&id=" + s.split("\'")[1] + "#isml";
										document.getElementsByAttributeValue("href", s).attr("href", jsurl);

									}
								}
							}
							if(s.split("\"").length > 1){
								if(s.split("\"").length == 3){
									jsurl = "http://spjc.bjmtg.gov.cn/hrapp/article/articleDetail.jsp?id=" + s.split("\"")[1];
									document.getElementsByAttributeValue("href", s).attr("href", jsurl);
								}else if(s.split("\"").length == 7){
									if (s.split("\"")[5].equals("1")) {
										jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=" + s.split("\"")[3] + "&id=" + s.split("\"")[1];
										document.getElementsByAttributeValue("href", s).attr("href", jsurl);
									} else if (s.split("\"")[5].equals("2")) {
										jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=view&deptId=" + s.split("\"")[3] + "&id=" + s.split("\"")[1] + "#isml";
										document.getElementsByAttributeValue("href", s).attr("href", jsurl);

									}
								}
							}
						}
						if (s.startsWith("javascript:showDetailpolicy(")) {
							if(s.split("\'").length > 1){
								jsurl = "http://spjc.bjmtg.gov.cn/publicaction?operation=showPolicyInfo&lngitem=" + s.split("\'")[1];
								document.getElementsByAttributeValue("href", s).attr("href", jsurl);
							}
							if(s.split("\"").length > 1){
								jsurl = "http://spjc.bjmtg.gov.cn/publicaction?operation=showPolicyInfo&lngitem=" + s.split("\"")[1];
								document.getElementsByAttributeValue("href", s).attr("href", jsurl);
							}
							
						}
						if (s.startsWith("javascript:showList(")) {
							if(s.split("\'").length > 1){
								jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=list&Station=tgfwdwbj&deptId=" + s.split("\'")[1];
								document.getElementsByAttributeValue("href", s).attr("href", jsurl);
							}
							if(s.split("\"").length > 1){
								jsurl = "http://spjc.bjmtg.gov.cn/hrapp/weboffice/proceedingCtrl?operation=list&Station=tgfwdwbj&deptId=" + s.split("\"")[1];
								document.getElementsByAttributeValue("href", s).attr("href", jsurl);
							}
						}
						//logger.info("jsurl**************************:"+jsurl);
						
					} else {
						jsurl = s;
					}
					System.out.println("jsurl:"+jsurl);
				}
				// 保存url到数数库
				// mongoDBUtil.downloadsInsert(jsurl);

			}
			long end = System.currentTimeMillis();
			System.out.println(end - start);
			// --------------------------------------------------------------------------------//
			//System.out.println(document.html().toString());
			htmlPage = null;
			return document;
		} catch (FailingHttpStatusCodeException e) {
			log.error(e);
		} catch (MalformedURLException e) {
			log.error(e);
		} catch (IOException e) {
			log.error(e);
		} finally {
			if (webClient != null) {
				webClient.close();
			}
		}
		return null;

	}

	public static String inputStream2String(InputStream in) throws IOException {
		StringBuffer out = new StringBuffer();
		byte[] b = new byte[4096];
		for (int n; (n = in.read(b)) != -1;) {
			out.append(new String(b, 0, n));
		}
		return out.toString();
	}

	public static String encodeChinese(String url) {
		String[] zu = url.split("=");
		String strnew = zu[0];
		// long l = System.currentTimeMillis();
		if (zu.length > 0) {
			for (int i = 1; i < zu.length; i++) {
				if (Util.isChineseCharacter(zu[i])) {
					try {
						zu[i] = java.net.URLEncoder.encode(zu[i], "GBK");
					} catch (UnsupportedEncodingException e) {
						e.printStackTrace();
					}
				}
				strnew = strnew + "=" + zu[i];
			}
		}
		// long l1 = System.currentTimeMillis();
		strnew = strnew.replaceAll("%26", "&");
		System.out.println("**encodeChinese:" + strnew);
		// long l2 = System.currentTimeMillis();
		return strnew;
	}

	public static String encodeChinese1(String url) {
		String strnew = url;
		// long l = System.currentTimeMillis();

		if (Util.isChineseCharacter(url)) {
			try {
				url = java.net.URLEncoder.encode(url, "GBK");
			} catch (UnsupportedEncodingException e) {
				e.printStackTrace();
			}
		}
		strnew = url;

		// long l1 = System.currentTimeMillis();
		strnew = strnew.replaceAll("%26", "&");
		//System.out.println("**encodeChinese:" + strnew);
		// long l2 = System.currentTimeMillis();
		return strnew;
	}
}
